#
# summarizePosteriorSamples.R
#
# Summarize posterior beliefs about polarization
# given MCMC samples.
#
# Hill, Seth J. and Chris Tausanovitch. "A Disconnect in Representation? Comparison of Trends in Congressional and Public Polarization."
#

library(bit64)
library(data.table)
options(stringsAsFActors=F)

# Parameters.
if (!"run" %in% ls()) { # If not passed from parent script.
  run <- "full" # which run to analyze. choose from c("senate","full","ordered","social","unglued").
}
cat("\nSummarizing results for",run,"estimation run.\n\n")
thin <- 2 # further thinning of posterior draws.
theshold <- 95 # percent interior to define as threshhold for polarization in replacement figure.

if (run == "senate") {
  # Call in data.
  cat("Working on Senate...\n");flush.console()
  load("sample_output/senateBig10k.RData")
  cat("Called in data at",paste(Sys.time()),"\n");flush.console()
  jdata <- merged; rm(merged) # for similar naming
  the.weights <- rep(1,jdata$n)
} else {
  # Call in weights from original data.
  cat("Working on ANES...\n");flush.console()
  if (run == "full") {
    load("anes_cdf_extra_50s60s.RData")
  } else if (run == "ordered") {
    load("anes_cdf_extra_50s60s.RData")
    # Drop records for cases in 1948, 1952, and 1954 so that matches
    # the subsample list of cases passed in jdata below. 
    # [MCMCpack ordered model can't handle full data.]
    anes.cdf.with.2012 <- anes.cdf.with.2012[anes.cdf.with.2012$VCF0004 > 1955,]
  } else if (run == "social") {
    load("anes_cdf_extra_50s60s_social_only.RData")
    # Limit only to 1980 and later.
    anes.cdf.with.2012 <- anes.cdf.with.2012[anes.cdf.with.2012$VCF0004 > 1979,]
  } else if (run == "unglued") {
    load("anes_cdf_extra_50s60s_less_glue.RData")
  }
  # Create year and weight variables.
  year <- anes.cdf.with.2012$VCF0004
  the.weights <- anes.cdf.with.2012$VCF0009a[year >= 1956] # post-stratification weights.
  cat("Grabbed sample weights at",paste(Sys.time()),"\n");flush.console()
  rm(anes.cdf.with.2012)

  # Call in posterior samples.
  if (run == "full") {
    load("sample_output/jagsoutFull25kchain1Norm.RData")
    thin <- 1 # 25K normed output is already thinned by 10 to 2,500. don't thin more.
  } else if (run == "ordered") {
    load("sample_output/jagsoutFull2kOrd.RData")
    # Ordered run only on a subsample. Update year and weights.
    the.weights <- the.weights[jdata$sampled.obs]
    jdata$yearparty <- jdata$yearparty[jdata$sampled.obs]
  } else if (run == "social") {
    load("sample_output/jagsoutSocialNorm2k.RData")
  } else if (run == "unglued") {
    load("sample_output/jagsoutUngluedNorm10k.RData")
  }
  cat("Called in data at",paste(Sys.time()),"\n");flush.console()
}

# Turn the posterior x's into a long data.table for easier manipulation.
if (run == "ordered") { # MCMCpack output different.
  X <- as.matrix(output[,grep("phi",colnames(output))]) # grab ideal points
} else {
  X <- as.matrix(output$x)
}
cat(" Found",nrow(X),"posterior samples of each parameter...\n");flush.console()
if (thin > 1) {
  cat("Thinning posterior samples by factor of",thin,"...\n")
  X <- X[1:nrow(X) %% thin == 0,]
}
cat(" Using",nrow(X),"samples for each parameter.\n")

# Check that length of weights vector equals jdata$n.
if (length(the.weights) != jdata$n) {
  stop("Weight vector of different length than number of respondents in estimation.\n")
}
DT <- data.table(id=rep(1:jdata$n,nrow(X)),weight=rep(the.weights,nrow(X)),
                 iter=rep(1:nrow(X),each=jdata$n),x=c(t(X)))
setkey(DT,id)
rm(output,X,the.weights);gc()

if (run == "senate") {
  # Create data.table mapping yearparty to year and party.
  yp <- data.table(merge(data.frame(cong=seq(82,113)),data.frame(pid3=c("D","R","Indep"))))
  yp[,yearparty := seq(1,.N)] ; yp[,year := 1787 + 2*cong]

  # Because the same senator served in multiple Congresses, have to cross-join yearparty
  # to each iteration summarizing a senator in that yearparty.
  
  # First, create a data.table of senator id to year and pid3.
  legis.dat <- data.table(jdata$legis.data)
  Z <- NULL
  for (cng in seq(82,113)) {
    # Grab all the legislator ids for this congress.
    this.cong <- legis.dat[!is.na(legis.dat[[sprintf("s%s",cng)]]),c("id","party"),with=F]
    this.cong[,pid3 := as.character(party)]
    this.cong[,cong := cng]
    # Add to data.table Z.
    Z <- rbindlist(list(Z,this.cong))
  }
  rm(this.cong,cng)
  Z[,party := NULL] # using variable name pid3
  Z[pid3 == "Conservative",pid3 := "Indep"] # just one legislator
  # Merge yearparty and year to each row of Z.
  Z <- merge(Z,yp[,c("yearparty","year","cong","pid3"),with=F],by=c("cong","pid3"),all.x=T,all.y=F)
  setkey(Z,id)
  # Cross-join yearparty and year to each id on each iteration.
  DT <- merge(DT,Z,by="id",all.x=T,all.y=F,allow.cartesian=T) # DT now has year and party
  setkeyv(DT,c("year","yearparty","id","iter"))
  cat("Converted data to long data.table at",paste(Sys.time()),"\n");flush.console()
} else {
  # Create data.table mapping yearparty to year and party.
  yp <- data.table(as.data.frame(jdata$codes,stringsAsFactors=F)) # map from yearparty to year and party
  yp[,yearparty := as.numeric(as.character(yearparty))]; yp[,year := as.numeric(as.character(year))]

  # Merge yearparty and year to each id on each iteration.
  Z <- data.table(id=1:jdata$n,yearparty=jdata$yearparty) # map from id to yearparty
  Z <- merge(Z,yp[,c("yearparty","year"),with=F],by="yearparty")
  setkey(Z,id)
  DT <- merge(DT,Z,by="id",all.x=T,all.y=F) # DT now has year and party
  setkeyv(DT,c("year","yearparty","id","iter"))
  cat("Converted data to long data.table at",paste(Sys.time()),"\n");flush.console()
}

if (DT[,any(is.na(year))]) {
  flag <- DT[,is.na(year)]
  cat(" Dropping",sum(flag),"posterior samples for ids",paste(unique(DT[flag,id]),collapse=","),
      "without a match to year...\n")
  DT <- DT[!flag,]
}

# If this is a subsetted run, open the anes data,
# grab the subset variable, and subset DT
if ("subset_type" %in% ls()){
    if(subset_type=="education"){
        load("anes_cdf_extra_50s60s.RData")
        minyear <- 1956
        data <- anes.cdf.with.2012
        year  <- data$VCF0004
        data <- data[year >= minyear ,]
        education <- data$VCF0110
        college <- which(education==4)
        DT <- DT[DT$id %in% college,]
        rm(data)
        rm(year)
        rm(anes.cdf.with.2012)
    }
    if(subset_type=="influence"){
        load("anes_cdf_extra_50s60s.RData")
        minyear <- 1956
        data <- anes.cdf.with.2012
        year  <- data$VCF0004
        data <- data[year >= minyear ,]
        influence <- data$VCF0717
        yeses <- which(influence==2)
        DT <- DT[DT$id %in% yeses,]
        rm(data)
        rm(year)
        rm(anes.cdf.with.2012)
    }
    if(subset_type=="interest"){
        load("anes_cdf_extra_50s60s.RData")
        minyear <- 1956
        data <- anes.cdf.with.2012
        year  <- data$VCF0004
        data <- data[year >= minyear ,]
        interest <- data$VCF0310
        verymuch <- which(interest==3)
        DT <- DT[DT$id %in% verymuch,]
        rm(data)
        rm(year)
        rm(anes.cdf.with.2012)
    }
}


# Open pdf graphics device with appropriate moniker.
if (run == "full") {
  nm <- "Full"
} else if (run == "ordered") {
  nm <- "FullOrdered"
} else if (run == "social") {
  nm <- "Social"
} else if (run == "unglued") {
  nm <- "Unglued"
} else if (run == "senate") {
  nm <- "Senate"
}
if ("subset_type" %in% ls()){nm <- paste(nm,subset_type,sep="")}
pdf(sprintf("graphs/SummaryPlots%s.pdf",nm),width=10,height=8)
par(mar=c(4.1,4.1,1.1,1.1))

#
# Histogram by year.
#
library(matrixStats) # for weightedMedian().
x.meds <- DT[,list(x=weightedMedian(x=x,w=weight,method="quick")),by=c("id","yearparty")]
# Join year and party.
x.meds <- merge(x.meds,yp,by="yearparty",all.x=T,all.y=F)
cat("Calculated posterior medians for each respondent at",paste(Sys.time()),"\n");flush.console()
par.old <- par(mfrow=c(4,ceiling(length(unique(x.meds$year))/4)),mar=c(.5,.5,1,.5),oma=c(0,0,2.1,0))
full.hist <- hist(x.meds$x,plot=F,breaks=20)
my.hist <- function(x,breaks,axis.at=NULL,plot.prop=T,...) {
  if (plot.prop) {
    blah <- hist(x,breaks=breaks,plot=F)
    blah$density <- blah$counts/sum(blah$counts)
    plot(blah,axes=F,ann=F,freq=F,...)
  } else {
    blah <- hist(x,breaks=breaks,axes=F,ann=F,...)
  }
  if (!is.null(axis.at)) { # plot ticks on x-axis at at.
    axis(1,at=axis.at,labels=F,line=-.5,lwd=2)
  }
}
ylim <- c(0,.35) # common y-axis.
if (run == "senate") ylim <- c(0,.2)
for (yr in sort(unique(x.meds$year))) {
  x.meds[year == yr,my.hist(x,breaks=full.hist$breaks,ylim=ylim)]
  title(main=paste(yr),line=.25,cex=.8)
}
#title(main="Distribution of individual posterior median ideal points by year",outer=T,line=1)
par(par.old)

#
# Histogram of all draws by year.
#
cat("Making histogram of all draws by year at",paste(Sys.time()),"\n");flush.console()
par.old <- par(mfrow=c(4,ceiling(length(unique(x.meds$year))/4)),mar=c(.5,.5,1,.5),oma=c(0,0,2.1,0))
full.hist <- hist(DT[,x],plot=F,breaks=20)
ylim <- c(0,.25)
for (yr in sort(unique(x.meds$year))) {
  DT[yearparty %in% yp[year==yr,yearparty],my.hist(x,breaks=full.hist$breaks,ylim=ylim)]
  title(main=paste(yr),line=.25,cex=.8)
  cat(yr,"...");flush.console()
}
cat("\n")
#title(main="Distribution of all sampled ideal points by year",outer=T,line=1)
par(par.old)

#
# Standard deviation by year.
#
library(Hmisc) # for wtd.var
y.sds <- DT[,list(x=sqrt(wtd.var(x=x,weights=weight,normwt=T))),
            by=c("iter","year")] # year sds on each iter
y2 <- y.sds[,list(lb=quantile(x,.025),med=median(x),ub=quantile(x,.975)),
               by=c("year")] # median and 95 percent CI of year sds.
cat("Calculated posterior year standard deviations at",paste(Sys.time()),"\n");flush.console()
y2[,plot(x=year,y=med,ylim=range(c(.75,ub,lb,1.2)),xlim=range(c(1956,year)),type='n',ann=F,axes=F)]
abline(h=1,lty=2,lwd=2,col="gray")
y2[,segments(x0=year,x1=year,y0=lb,y1=ub)]
y2[,points(x=year,y=med,pch=19,cex=1.5)]
axis(1) ; axis(2,las=2)
title(xlab="Year",ylab="Standard deviation of ideal points by year")

#
# Posterior distribution of standard deviations versus standard deviation of 
# posterior medians by year.
#
y22 <- merge(y2,x.meds[,list(sd.post=sd(x)),by="year"],by="year")
# Median posteriors and CI.
y22[,plot(x=year,y=med,ylim=range(c(sd.post,ub,lb)),xlim=range(c(1956,year)),type='n',ann=F,axes=F)]
y22[,segments(x0=year,x1=year,y0=lb,y1=ub)]
y22[,points(x=year,y=med,pch=19,cex=1.5)]
# SD of posterior medians.
y22[,points(x=year+.5,y=sd.post,pch=15,col="darkgray",cex=1.5)]
# Linear fits.
lm.fit <- function(y,x,...) {
  # Create a plot a linear fit of y on x, limiting line to range of x.
  fit <- lm(y ~ x)
  p   <- predict(fit)
  segments(x0=x[which.min(x)],x1=x[which.max(x)],
           y0=p[which.min(x)],y1=p[which.max(x)],...)
}
y22[,lm.fit(y=med,x=year,lty=2,lwd=2)]
y22[,lm.fit(y=sd.post,x=year+.5,lty=3,lwd=2,col="darkgray")]
legend(ifelse(run=="social","bottomleft","bottomright"),pch=c(15,19),col=c("darkgray","black"),
        legend=c("Standard deviation of posterior medians",
        "Posterior median standard deviation"))
axis(1) ; axis(2,las=2)
title(xlab="Year",ylab="Standard deviation of ideal points by year")

#
# Proportion of respondents at year t+1 outside of interior theshold percent of year t
# distribution of ideal points.
#
library(Hmisc) # for wtd.quantile.
probs <- c((1-(theshold/100))/2, 1 - (1-(theshold/100))/2)
interior.thresh <- DT[,list(lb=wtd.quantile(x,weights=weight,probs=probs[1]),
                        ub=wtd.quantile(x,weights=weight,probs=probs[2])),
                        by=c("iter","year")]
outside.thresh <- NULL # Result data.table.
# Loop over years.
years <- sort(unique(yp[,year]))
for (yr in 2:length(years)) {
  cat("Calculating percent outside previous",theshold,"percent interior for",years[yr],"\n");flush.console()
  # Merge lb and ub to this yr by iter. Weighted.
  sub.dt <- merge(DT[year == years[yr],c("iter","x","weight"),with=F],
                  interior.thresh[year == years[(yr-1)],c("iter","lb","ub"),with=F],
                  by="iter",all.x=T,all.y=F)
  # Calculate cases outside previous 95 pct interior.
  sub.dt[,outside.interior := (x < lb | x > ub)]
  # Proportion outsider previous interior by iteration. Weighted.
  res <- sub.dt[,list(p.outside.interior=wtd.mean(x=outside.interior,weights=weight,na.rm=T)),
                by=c("iter")]
  res[,year := years[yr]]
  # Add proportions to overs.
  outside.thresh <- rbindlist(list(outside.thresh,res))
}
rm(sub.dt,res)
# Plot.
outs2 <- outside.thresh[,list(lb=quantile(p.outside.interior,.025,na.rm=T),
                        med=median(p.outside.interior,na.rm=T),ub=quantile(p.outside.interior,.975,na.rm=T)),
                        by=c("year")] # median and 95 percent CI of proportion outside.
cat("Calculated proportion outside previous interval at",paste(Sys.time()),"\n");flush.console()
outs2[,plot(x=year,y=med,ylim=c(.9,1.1)*range(c(ub,lb)),xlim=range(c(year,1956)),type='n',ann=F,axes=F)]
abline(h=1-(theshold/100),lty=2,lwd=2,col="gray") # line at null.
outs2[,segments(x0=year,x1=year,y0=lb,y1=ub)]
outs2[,points(x=year,y=med,pch=19,cex=1.5)]
axis(1) ; axis(2,las=2)
title(xlab="Year",
      ylab=paste("Proportion outside previous year's interior",theshold,"percent"))

#
# Medians by year and party. Weighted.
#
library(matrixStats) # for weightedMedian().
#py.meds <- DT[,list(x=median(x)),by=c("iter","yearparty")] # party-year medians on each iter
py.meds <- DT[,list(x=weightedMedian(x=x,w=weight,method="quick")),
              by=c("iter","yearparty")] # party-year medians on each iter
# Join year and party.
py.meds <- merge(py.meds,yp,by="yearparty",all.x=T,all.y=F)
py <- py.meds[,list(lb=quantile(x,.025),med=median(x),ub=quantile(x,.975)),
               by=c("year","pid3")] # median and 95 percent CI of party-year medians.
if (run == "senate") {
  # Don't plot third parties.
  py <- py[pid3 %in% c("D","R"),]
}
cat("Calculated posterior party-year medians at",paste(Sys.time()),"\n");flush.console()
py[,yp := ifelse(pid3=="D",year-.5,ifelse(pid3=="R",year+.5,year))]
py[,plot(x=yp,y=med,ylim=range(c(-1,1,ub,lb)),xlim=range(c(1956,yp)),type='n',ann=F,axes=F)]
py[,segments(x0=yp,x1=yp,y0=lb,y1=ub)]
py[,points(x=yp,y=med,pch=ifelse(pid3=="D",19,ifelse(pid3=="R",15,17)),cex=1.5,
           col=ifelse(pid3=="D","blue",ifelse(pid3=="R","red","black")))]
axis(1) ; axis(2,las=2)
title(xlab="Year",ylab="Party median by year")
if (run == "senate") {
  legend("bottomleft",pch=c(19,15),col=c("blue","red"),legend=c("D","R"))
} else {
  legend("bottomleft",pch=c(19,15,17),col=c("blue","red","black"),legend=c("D","R","I"))
}
cat("Gap between party medians by year:\n")
py.gap <- merge(py[pid3 == "D",c("year","med"),with=F],py[pid3 == "R",c("year","med"),with=F],
                by="year",suffixes=c(".d",".r"))
py.gap[,gap := med.r - med.d]
setkey(py.gap,gap)
print(py.gap[,c("year","gap"),with=F])

#
# Standard dev by year and party. Weighted.
#
library(Hmisc) # for wtd.var
py.sds <- DT[,list(x=sqrt(wtd.var(x=x,weights=weight,normwt=T))),
            by=c("iter","yearparty")] # party-year sds on each iter
py.sds <- py.sds[!is.na(x),] # for senate, in congresses with only one independent, no variance.            
# Join year and party.
py.sds <- merge(py.sds,yp,by="yearparty",all.x=T,all.y=F)
py2 <- py.sds[,list(lb=quantile(x,.025),med=median(x),ub=quantile(x,.975)),
               by=c("year","pid3")] # median and 95 percent CI of party-year sds.
if (run == "senate") {
  # Don't plot third parties.
  py2 <- py2[pid3 %in% c("D","R"),]
}
cat("Calculated posterior party-year standard deviations at",paste(Sys.time()),"\n");flush.console()
py2[,yp := ifelse(pid3=="D",year-.5,ifelse(pid3=="R",year+.5,year))]
py2[,plot(x=yp,y=med,ylim=range(c(.3,ub,lb,1.2)),xlim=range(c(1956,yp)),type='n',ann=F,axes=F)]
py2[,segments(x0=yp,x1=yp,y0=lb,y1=ub)]
py2[,points(x=yp,y=med,pch=ifelse(pid3=="D",19,ifelse(pid3=="R",15,17)),cex=1.5,
           col=ifelse(pid3=="D","blue",ifelse(pid3=="R","red","black")))]
axis(1) ; axis(2,las=2)
title(xlab="Year",ylab="Party standard deviation by year")
if (run == "senate") {
  legend("bottomleft",pch=c(19,15),col=c("blue","red"),legend=c("D","R"))
} else {
  legend("bottomleft",pch=c(19,15,17),col=c("blue","red","black"),legend=c("D","R","I"))
}

#
# Party overlap by year. Weighted.
#
if (F) { # actual min/max
  mins.maxs <- DT[,list(min.x=min(x),max.x=max(x)),by=c("iter","yearparty")]
} else { # quantiles
  mins.maxs <- DT[,list(min.x=wtd.quantile(x,weights=weight,probs=.05),
                        max.x=wtd.quantile(x,weights=weight,probs=.95)),by=c("iter","yearparty")]
}
# Join year and party.
mins.maxs <- merge(mins.maxs,yp,by="yearparty",all.x=T,all.y=F)
# Data.table of maximum Dem values by year and iteration.
max.Ds <- mins.maxs[pid3 == "D",c("iter","year","max.x"),with=F]
setnames(max.Ds,gsub("max.x","max.D",names(max.Ds)))
# Cross join with yearparty so that it can merge back to DT.
max.Ds <- merge(max.Ds,yp[,c("year","yearparty"),with=F],by="year",allow.cartesian=T)
max.Ds[,year := NULL]
setkeyv(max.Ds,c("yearparty","iter"))
# Data.table of minimum Rep values by year and iteration.
min.Rs <- mins.maxs[pid3 == "R",c("iter","year","min.x"),with=F]
setnames(min.Rs,gsub("min.x","min.R",names(min.Rs)))
# Cross join with yearparty so that it can merge back to DT.
min.Rs <- merge(min.Rs,yp[,c("year","yearparty"),with=F],by="year",allow.cartesian=T)
min.Rs[,year := NULL]
setkeyv(min.Rs,c("yearparty","iter"))
# Join those two together.
mins.maxs <- merge(max.Ds,min.Rs,by=c("iter","yearparty"))
if (any(mins.maxs[,min.R > max.D])) {
  warning("Some minimum R points greater than maximum D points. No one would be in such an interval. See
  object mins.maxs.\n")
}

overs <- NULL # Result data.table.
# Loop over yearparties.
yearparties <- sort(c(yp[,yearparty])) #c(yp[pid3 != "I",yearparty])

for (yearpart in yearparties) {
  cat("Calculating percent overlap for yearparty",yearpart,"\n");flush.console()
  # Merge mins and maxes to this yearparty. Weighted.
  sub.dt <- merge(DT[yearparty == yearpart,c("iter","x","yearparty","weight"),with=F],
                  mins.maxs[yearparty == yearpart,c("iter","max.D","min.R"),with=F],
                  by="iter",all.x=T,all.y=F)
  # Calculate cases within overlap region.
  sub.dt[,within.overlap := (min.R <= x & x <= max.D)]
  # Proportion within overlap by yearparty and iteration. Weighted.
  res <- sub.dt[,list(p.in.overlap.region=wtd.mean(x=within.overlap,weights=weight,na.rm=T)),
                by=c("yearparty","iter")]
  # Add proportions to overs.
  overs <- rbindlist(list(overs,res))
}
rm(sub.dt)
# Remove missing.
overs <- overs[!is.na(p.in.overlap.region),]

# Join year and party.
overs <- merge(overs,yp,by="yearparty",all.x=T,all.y=F)
# Calculate posterior median and credible intervals.
overs2 <- overs[,list(lb=quantile(p.in.overlap.region,.025),med=median(p.in.overlap.region),
                      ub=quantile(p.in.overlap.region,.975)),
                 by=c("year","pid3")]
if (run == "senate") {
  # Don't plot third parties.
  overs2 <- overs2[pid3 %in% c("D","R"),]
}
cat("Calculated posterior party-year overlap proportions at",paste(Sys.time()),"\n");flush.console()
overs2[,yp := ifelse(pid3=="D",year-.5,ifelse(pid3=="R",year+.5,year))]
overs2[,plot(x=yp,y=med,ylim=c(0,1),xlim=range(c(1956,yp)),type='n',ann=F,axes=F)]
overs2[,segments(x0=yp,x1=yp,y0=lb,y1=ub)]
overs2[,points(x=yp,y=med,pch=ifelse(pid3=="D",19,ifelse(pid3=="R",15,17)),cex=1.5,
        col=ifelse(pid3=="D","blue",ifelse(pid3=="R","red","black")))]
axis(1) ; axis(2,las=2)
title(xlab="Year",ylab="Proportion partisans more conservative than 5th R and less conservative than 95th D")
if (run == "senate") {
  legend("bottomleft",pch=c(19,15),col=c("blue","red"),legend=c("D","R"))
} else {
  legend("bottomleft",pch=c(19,15,17),col=c("blue","red","black"),legend=c("D","R","I"))
}

#
# Proportion of all variance attributable to between-party variance. Weighted
#
py.means <- DT[,list(party.mean=wtd.mean(x=x,weights=weight)),
              by=c("iter","yearparty")] # party-year means on each iter
# Calculate within-party variance on de-meaned ideal points for each iteration and year. Weighted.
variances <- NULL # Result data.table.
for (yr in sort(unique(x.meds$year))) {
  sub.dt <- DT[yearparty %in% yp[year==yr,yearparty],] # subset to cases in this year
  # Join party-year means.
  sub.dt <- merge(sub.dt,py.means,by=c("iter","yearparty"),all.x=T,all.y=F)
  # Subtract off party mean.
  sub.dt[,demean.x := x - party.mean]
  # Calculate the weighted variance by iteration.
  sub.dt <- sub.dt[,list(within.var=wtd.var(x=demean.x,weights=weight,normwt=T)),by="iter"]
  sub.dt[,year := yr]
  # Add result to variances.
  variances <- rbindlist(list(variances,sub.dt))
  rm(sub.dt)
}
if ("demean.x" %in% names(DT)) { DT[,demean.x := NULL] }# save some memory
# Calculate overall variance on all ideal points for each iteration and year. Weighted.
v.over <- NULL # Result data.table.
for (yr in sort(unique(x.meds$year))) {
  sub.dt <- DT[yearparty %in% yp[year==yr,yearparty], # subset to cases in this year
              list(overall.var=wtd.var(x=x,weights=weight,normwt=T)),by="iter"]
  sub.dt[,year := yr]
  # Add result to v.over.
  v.over <- rbindlist(list(v.over,sub.dt))
}
rm(sub.dt)
# Merge overall variances to variances.
variances <- merge(variances,v.over,by=c("iter","year"),all.x=T,all.y=F)

# Calculate between variance as difference between total variance and within variance.
variances[,between.var := overall.var - within.var]
# Proportion of total variance between-party.
variances[,p.between := between.var/overall.var]
cat("Calculated between- vs within-party variance at",paste(Sys.time()),"\n");flush.console()

# Calculate posterior median and credible intervals.
vars <- variances[,list(lb=quantile(p.between,.025),med=median(p.between),
                        ub=quantile(p.between,.975)),
                 by=c("year")] # median and sd of party-year medians.
# Plot.
vars[,plot(x=year,y=med,ylim=c(0,1),xlim=range(c(1956,year)),type='n',ann=F,axes=F)]
vars[,segments(x0=year,x1=year,y0=lb,y1=ub)]
vars[,points(x=year,y=med,pch=19,cex=1.5)]
axis(1) ; axis(2,las=2)
title(xlab="Year",ylab="Proportion of all variance attributable to between-party variance")

dev.off()
